** TITLE **
** Charlotte Cavaille
** Kris-Stella Trump

*** replication code 

*********************
*** The datasets
*** The raw BSAS data can be downloaded from the uk data service website http://ukdataservice.ac.uk/
*** the raw ESS data can be downloaded from the ESS website www.europeansocialsurvey.org  
*** Note: There are no BSAS cumulative file and the researcher needs to merge all the waves into one dataset.
*** We provide three datasets for the replication. One with the full BSAS longitudinal data, one with the BSAS 2004 data,
*** and a final one with the ESS 2008 data. 
*** For questions, please email cavaille@fas.harvard.edu
*********************


************************************************************
************************** Part XXX.1.b  *********************
************************************************************


*************************
*** Table 1. ************
*************************
**** Dataset : ess_2008_for_rep

global COUNTRIES "FR SE GB DE" 
foreach country in $COUNTRIES {

preserve

keep if cntry == "`country'"

*** to compute the polychoric correlation matrix for sem use the ssd_ess.do file
*** we advise that you keep it in a separate .do file.

do "/Users/XXXXXX/ssd.do"


sem (WELF -> sblazy sblwlka sblwcoa gvslvue uentrjb bennent prtsick gvslvue) ///
(EQUA ->  gincdif_n smdfslv_n dfincac) (GOV ->  gvjbevn gvslvue gvslvol gvhlthc gvpdlwk gvcldcr), ///
stand cov(e.gvpdlwk*e.gvcldcr)  cov(e.gvslvol*e.gvhlthc) cov(e.sblwlka*e.sblwcoa) ///
cov(e.uentrjb*e.bennent) cov(e.prtsick*e.bennent) cov(e.uentrjb*e.prtsick) 

estat gof, stats(all)
estat mindices


} 

clear all 

restore 

}

*************************
**** Model fitting code, see appendix for results
*************************


****
*** Example below is with French data (see results and comments in the appendix) 
*****



clear all
use "/Users/XXX/ess_2008_for_rep.dta"

keep if cntry == "FR"

*** saturated model 

sem (<- sblazy sblwlka sblwcoa uentrjb bennent prtsick gincdif_n smdfslv_n dfincac gvjbevn gvslvue gvslvol gvhlthc gvpdlwk gvcldcr), stand 

estat gof, stats(all)

*** AIC for France is 58478

*** M1: unidimensional hypothesis

sem (UNI -> sblazy sblwlka sblwcoa uentrjb bennent prtsick gincdif_n smdfslv_n dfincac gvjbevn gvslvue gvslvol gvhlthc gvpdlwk gvcldcr), stand

estat gof, stats(all)

*** AIC is 62067

*** M2 : two dimensions 

sem (WELF -> sblazy sblwlka sblwcoa uentrjb bennent prtsick) (GOVEQUA ->  gincdif_n smdfslv_n dfincac gvjbevn gvslvue gvslvol gvhlthc gvpdlwk gvcldcr), stand

estat gof, stats(all)

*** AIC is 59808, sharp improvement

*** M3: three dimensions 

sem (WELF -> sblazy sblwlka sblwcoa uentrjb bennent prtsick) (EQUA ->  gincdif_n smdfslv_n dfincac) (GOV ->  gvjbevn gvslvue gvslvol gvhlthc gvpdlwk gvcldcr), stand

estat gof, stats(all)

*** AIC is 59365, the improvement is less striking than when moving from one to two dimensions. 

*** we are still far from the baseline AIC, step one is to look at the modification indices to see what are the sources of misfit. Misfit can come either
*** from an item being correlated with one of the other components/dimensions or from the fact that the item-specific errors of two items are correlated, 
*** because of similar wording for instance

estat mindices

*** we find five sources of misfit with very large modification indices: 1) sblwlka and sblwcoa have shared variance most likely due to a similar wording
*** 2) govpdlw and gvcldcr as well as 3) gvslvol and gvhlthc, also have shared variance. This can be expected as support for government 
*** provided health care and old age pension is in countries like France close to universal. Support for new needs such as 
*** childcare or income loss due to one's caretaker responsability, is on the other hand less consensual (see Silja Hausermann's work on these issues) . 
*** In terms of error correlation, one might expect support for public pensions to be highly correlated with support for a public provision of healthcare.
*** The other two policies on the other hand might better capture commitment to government income protection beyond the core constituted by pension and healthcare.
*** capturing one's commitment to having the welfare state address income insecurity more generally. 
*** An additional source of misfit is 4) a correlation between uentrjb, bennent and prstick which indicates a potential third component shaping answers to these three items.
*** The final source is the probability that non-anchor items that have been constrained to load on one dimension only (thus behave like anchor items) load on other latent dimensions. 
*** gvslvue and dfincac are potential candidates because gvsvlue asks about helping the "other" (unemployed workers) and because dfincac taps into beliefs about 
*** the role of inequality on incentives , which Svallfors has argued, is different from attitudes toward egalitarian redistribution. Indeed we find modification indices to be
*** high for these two items with gvslvue and dfincac potentially loading on the WELF dimension. 


sem (WELF1 -> sblazy sblwlka sblwcoa gvslvue dfincac) (WELF2 -> uentrjb bennent prtsick gvslvue dfincac) ///
(EQUA ->  gincdif_n smdfslv_n dfincac) (GOV ->  gvjbevn gvslvue gvslvol gvhlthc gvpdlwk gvcldcr), ///
stand cov(e.gvpdlwk*e.gvcldcr)  cov(e.gvslvol*e.gvhlthc) cov(e.sblwlka*e.sblwcoa)

estat gof, stats(all)
estat mindices


*** AIC 58612 which is very close to the 58478 of the saturated model. 

**** We have found the best fitting model, let's now look at the theoretical meaning by looking more closely at the loadings and their p-values. 
*** First, there is no reason to include dfincac as loading on both WELF and EQUA as the loading on WELF is substantively small and 
*** and barely significant. In addition, WELF1 and WELF2 are highly correlated (corr = 0.80) indicating that
*** constraining these items to load on the same dimension while resulting in a lower goodness of fit does not joeopardize interpretation of the data structure.
***  Allowing the items to load on the same dimension while letting the error terms between uentrjb, bennent and prstick covary is a better theoretical fit.

*** We thus obtain the final model : 

sem (WELF -> sblazy sblwlka sblwcoa gvslvue uentrjb bennent prtsick gvslvue) ///
(EQUA ->  gincdif_n smdfslv_n dfincac) (GOV ->  gvjbevn gvslvue gvslvol gvhlthc gvpdlwk gvcldcr), ///
stand cov(e.gvpdlwk*e.gvcldcr)  cov(e.gvslvol*e.gvhlthc) cov(e.sblwlka*e.sblwcoa) ///
cov(e.uentrjb*e.bennent) cov(e.prtsick*e.bennent) cov(e.uentrjb*e.prtsick) 

estat gof, stats(all)
estat mindices



** AIC is equal to 58667, compared to  58478 in the saturated model and  58612 in the best fitting model . In addition all loadings are significant 
*** and make theoretical sense.  The same strategy was used for Germany, UK and SE with similar results (the main difference being the size of the loading factor for 
*** dfincac though it was always much smaller than the item's loading on the EQUA dimension). We use this model and analyse the results in the main paper.  



*************************
*** Table 2. ************
*************************
*** "predict" cannot be used with inputed summary statistics (ssd) which is needed for the "sem" package in Stata
*** we first predict the factor scores not correcting for the ordinal nature of some of the items, working in a CFA framework. 


global COUNTRIES " FR SE GB DE" 
foreach country in $COUNTRIES {

preserve

keep if cntry == "`country'"


quiet sem (WELF -> sblazy sblwlka sblwcoa gvslvue uentrjb bennent prtsick gvslvue) ///
(EQUA ->  gincdif_n smdfslv_n dfincac) (GOV ->  gvjbevn gvslvue gvslvol gvhlthc gvpdlwk gvcldcr), ///
 stand cov(e.gvpdlwk*e.gvcldcr)  cov(e.gvslvol*e.gvhlthc) cov(e.sblwlka*e.sblwcoa) ///
 cov(e.uentrjb*e.bennent) cov(e.prtsick*e.bennent) cov(e.uentrjb*e.prtsick) 


quiet predict WELFcfa EQUAcfa GOVcfa, latent(WELF EQUA GOV)
regress lrscale  GOVcfa [pw = pweight], beta
regress lrscale  GOVcfa EQUAcfa [pw = pweight], beta
regress lrscale  GOVcfa EQUAcfa WELFcfa [pw = pweight], beta

clear all 

restore 
}

*** we also compute the factor scores in an EFA framework , with the polychoric matrices.
*** With EFA, scores on each dimensions are computed using all variables. 
*** The resulting scores are thus more correlated than in a CFA framework where some factor loading are constrained to 0. 
***This is thus a more conservative test of the relationship between each component and the left-right scale


global COUNTRIES " FR SE GB DE" 
foreach country in $COUNTRIES {


quiet polychoric sblazy sblwlka sblwcoa uentrjb bennent prtsick gincdif_n smdfslv_n dfincac gvjbevn gvslvue gvslvol gvhlthc gvpdlwk gvcldcr if cntry == "`country'"
mat polychR_`country' = r(R)

global polyN_`country' = r(sum_w)

quiet tabstat sblazy sblwlka sblwcoa uentrjb bennent prtsick gincdif_n smdfslv_n dfincac gvjbevn gvslvue gvslvol gvhlthc gvpdlwk gvcldcr ///
if cntry == "`country'", stat(mean) save

matrix mean_mat_`country' =r(StatTotal)

quiet tabstat sblazy sblwlka sblwcoa uentrjb bennent prtsick gincdif_n smdfslv_n dfincac gvjbevn gvslvue gvslvol gvhlthc gvpdlwk gvcldcr ///
if cntry == "`country'", stat(sd) save

matrix sd_mat_`country' =r(StatTotal)

}

*** FR

factormat polychR_FR , n($polyN_FR) factors(6) sds(sd_mat_FR) mean(mean_mat_FR)
rotate 
quiet predict WELFefa_FR GOVefa_FR EQUAefa_FR if cntry == "FR"
regress lrscale  GOVefa_FR if cntry == "FR" [pw = pweight], beta
regress lrscale  GOVefa_FR EQUAefa_FR if cntry == "FR" [pw = pweight], beta
regress lrscale  GOVefa_FR EQUAefa_FR WELFefa_FR if cntry == "FR" [pw = pweight], beta


*** DE (WELF is second factor not first) 

factormat polychR_DE , n($polyN_DE) factors(6) sds(sd_mat_DE) mean(mean_mat_DE)
rotate  
quiet predict GOVefa_DE WELFefa_DE EQUAefa_DE if cntry == "DE"
regress lrscale  GOVefa_DE if cntry == "DE" [pw = pweight], beta
regress lrscale  GOVefa_DE EQUAefa_DE if cntry == "DE" [pw = pweight], beta
regress lrscale  GOVefa_DE EQUAefa_DE WELFefa_DE if cntry == "DE" [pw = pweight], beta



*** UK


factormat polychR_GB , n($polyN_GB) factors(6) sds(sd_mat_GB) mean(mean_mat_GB)
rotate  
quiet predict WELFefa_GB GOVefa_GB EQUAefa_GB if cntry == "GB"
regress lrscale  GOVefa_GB if cntry == "GB" [pw = pweight], beta
regress lrscale  GOVefa_GB EQUAefa_GB if cntry == "GB" [pw = pweight], beta
regress lrscale  GOVefa_GB EQUAefa_GB WELFefa_GB if cntry == "GB" [pw = pweight], beta



*** SE (WELF is second factor not first) 

factormat polychR_SE , n($polyN_SE) factors(6) sds(sd_mat_SE) mean(mean_mat_SE)
rotate  
quiet predict GOVefa_SE WELFefa_SE EQUAefa_SE if cntry == "SE"
regress lrscale  GOVefa_SE if cntry == "SE" [pw = pweight], beta
regress lrscale  GOVefa_SE EQUAefa_SE if cntry == "SE" [pw = pweight], beta
regress lrscale  GOVefa_SE EQUAefa_SE WELFefa_SE if cntry == "SE" [pw = pweight], beta

*** results shown in the paper are with the CFA scores but overal results are the same 

*** interpretation: with scores computed in a CFA framework, the predictive power of GOV disappears once EQUA is included in the regression
*** with scores computed in an EFA framework, while GOV is still significant , in all cases the standardized coefficient is always less than half
*** the size of the EQUA coefficient. Even with just three imperfect measurement items, the EQUA factor score is still more predictive than the GOV factor score


**** we also tested our results using generalized structural modelling allowing us
*** to test our framework using different link functions between the latent factors and the survey items

gsem (WELF -> sblazy sblwlka sblwcoa uentrjb bennent prtsick, ologit) (WELF -> gvslvue) ///
(EQUA -> gincdif_n smdfslv_n dfincac, ologit) (GOV -> gvjbevn gvslvue gvslvol gvhlthc gvpdlwk gvcldcr)///
if cntry == "SE", var(RT@1) var(EGA@1) var(GOV@1)


gsem (WELF -> sblazy sblwlka sblwcoa uentrjb bennent prtsick, ologit) (WELF -> gvslvue) ///
(EQUA -> gincdif_n smdfslv_n dfincac, ologit) (GOV -> gvjbevn gvslvue gvslvol gvhlthc gvpdlwk gvcldcr)///
if cntry == "FR", var(RT@1) var(EGA@1) var(GOV@1)

*** we find the same pattern in the relative sizes of the coefficients and the same covariance estimates for the relationship
*** between the different components

************************************************************
************************** Part XXX.1.b  *********************
************************************************************

**** Dataset to use : bsas_imputed_2004.dta

** variable coding: all questions have been recoded from "liberal" answer (lowest score) to "conservative" answer (highest score) 
** several items are not available as such in the data. The way they were computed are described below:  
** a) the item that measures support for withdrawing unemployment benefits from the non-working poor,  "exclusion_un",  has been computed as follow: 

gen exclusion_un = 0
replace exclusion_un = 1 if exclusion_un == 0 &  acunwshy == 1
replace exclusion_un = . if acunwshy == .
replace exclusion_un = 2 if exclusion_un == 1 &  acunpoor == 1 
replace exclusion_un = . if acunpoor == . & exclusion_un != 1
replace exclusion_un = 3 if exclusion_un == 2 & acunchil == 1
replace exclusion_un = .  if acunchil == .  & exclusion_un != 2

** the item that measures support for withdrawing unemployment benefits from immigrants, "welf_chavun" has been computed as follow: 


gen welf_chavun = 0
replace welf_chavun = 1 if welf_chavun == 0 &  acunbrec  == 1
replace welf_chavun = . if acunbrec  == .
replace welf_chavun = 2 if welf_chavun == 1 &  acunnbbr == 1 
replace welf_chavun = . if acunnbbr == . & welf_chavun != 1
replace welf_chavun = 3 if welf_chavun == 2 & acunrefu == 1
replace welf_chavun = .  if acunrefu == .  & exclusion_un != 2


**** the item that measures support for letting the rich access private health/education and pension services and decrease their contribution to the public one was computed as follow: 

recode taxrpenv2 (1=0) ( 2 3 = 1) ( 8 9 = .), gen(pen)
recode taxrnhsv2 (1=0) ( 2 3 = 1) ( 8 9 = .), gen(nhs)
recode taxreducv2 (1=0) ( 2 3 = 1) ( 8 9 = .), gen(educ)
gen depool = pen + nhs + educ

** the item used to measure tolerance for rich receiving better health services and education was computed as follow (Cronbach alpha of 0.93

alpha buyhlthv2 buyeducnv2,  gen(ineq_acc)
replace ineq_acc = . if buyhlthv2 == . | buyeducnv2 == . 

** because the whyneedv2 question is hard to interpret, we singled out the "lazy answer" turning whyneedv2 into a dichotomous item  

recode whyneedv2 (1 = 0) ( 2 3 = 0) ( 4 = 1), gen(whyneedv5)


*** create two sub-samples, one for the EFA, the other for the CFA

use "/Users/XXX/bsas_2004_for_rep_FINAL.dta"
sample 50 
save "/XXX/bsas_2004_for_rep_FINAL_sample_1.dta", replace
clear all
use "/Users/XXX/bsas_2004_for_rep_FINAL.dta"
merge 1:1 serial using "/Users/XXX/bsas_2004_for_rep_FINAL_sample_1.dta"
keep if _merge == 1
save "/XXX/bsas_2004_for_rep_FINAL_sample_2.dta", replace



*************************
*** Table 3. ************
*************************

use "/XXX/bsas_2004_for_rep_FINAL_sample_1.dta"

polychoric dolev2 welfhelpv2 falseclmv2 welffeetv2 sochelpv2 unempjobv2 dolefidlv2 whyneedv5 ////
damlivesv2 benfrsecv majtaxv2 soctaxv2 nousesecv2  morewelfv2 socspnd1v2 socspnd4v2 ////
welf_chavun exclusion_un indust4v2 richlawv2 wealthv2 ineqrichv2  incomgapv2 incdiffv2 diffsnecv2 ///
incdiffsv2 redistrbv2 redist2v2 taxraisev2 ineq_acc depool taxspendv2

display r(sum_w)
global N = r(sum_w)
matrix r = r(R)


factormat r, n($N) factors(4) 


*** rotate using oblique rotation 

rotate, oblique oblimin

*** check the orthogonality assumption 

estat common

*** correlation is very low,, factor loadings can be understood as if having used an orthogonal rotation 

factormat r, n($N) factors(10) 
rotate, varimax  

*** same results with the exceptions of lower loadings on majtaxv2 and nousesecv2


*** do a CFA on sample 2
*** see appendix for the results 


clear all
use "/XXX/bsas_2004_for_rep_FINAL_sample_2.dta"

do "/Users/XXXXXX/ssd_bsas_cross_sec_final.do"



*** saturated model 

sem(<- dolev2 welfhelpv2 falseclmv2 welffeetv2 sochelpv2 unempjobv2 dolefidlv2 whyneedv5 ////
damlivesv2 benfrsecv2 socspnd1v2 exclusion_un indust4v2 richlawv2 wealthv2 ineqrichv2 incdiffv2 ///
incdiffsv2 redistrbv2 redist2v2 ), stand

estat gof, stat(all) 

*** AIC :  BIC  :  see appendix 

** unidimensional: 

sem(UNI -> dolev2 welfhelpv2 falseclmv2 welffeetv2 sochelpv2 unempjobv2 dolefidlv2 whyneedv5 ////
damlivesv2 benfrsecv2 socspnd1v2 exclusion_un indust4v2 richlawv2 wealthv2 ineqrichv2 incdiffv2 ///
incdiffsv2 redistrbv2 redist2v2 ), stand 

estat gof, stat(all) 

*** AIC :  BIC  :  RSMS:  see appendix 

*** two-dimensional: 

sem(RT -> dolev2 welfhelpv2 falseclmv2 welffeetv2 sochelpv2 unempjobv2 dolefidlv2 whyneedv5 ////
damlivesv2 benfrsecv2 socspnd1v2 exclusion_un) (RF -> indust4v2 richlawv2 wealthv2 ineqrichv2 incdiffv2 ///
incdiffsv2 redistrbv2 redist2v2 ), stand 


estat gof, stat(all) 

*** AIC :  BIC  :  RSMS: see appendix 

*** looking for cross factor loading

estat mindices

sem(RT -> dolev2 welfhelpv2 falseclmv2 welffeetv2 sochelpv2 unempjobv2 dolefidlv2 whyneedv5 ////
damlivesv2 benfrsecv2 socspnd1v2 exclusion_un redistrbv2 redist2v2) (RF -> indust4v2 richlawv2 wealthv2 ineqrichv2 incdiffv2 ///
incdiffsv2 redistrbv2 redist2v2 damlivesv2 benfrsecv2), stand 

estat gof, stat(all) 

*** AIC :  BIC  :  RSMS: see appendix 

*** looking for error correlations  

estat mindices


sem(RT -> dolev2 welfhelpv2 falseclmv2 welffeetv2@1 sochelpv2 unempjobv2 dolefidlv2 whyneedv5 ////
damlivesv2 benfrsecv2 socspnd1v2 exclusion_un redistrbv2 redist2v2) (RF -> indust4v2 richlawv2 wealthv2@1 ineqrichv2 incdiffv2 ///
incdiffsv2 redistrbv2 redist2v2 damlivesv2 benfrsecv2), stand cov(e.incdiffv2*e.incdiffsv2) cov(e.dolev2*e.socspnd1v2) ///
 cov(e.redistrbv2*e.incdiffv2) cov(e.sochelpv2*e.dolefidlv2) ///
 cov(e.indust4v2*e.richlawv2) cov(e.falseclmv2*e.dolefidlv2)  cov(e.dolev2*e.whyneedv5)  ///
 cov(e.indust4v2*e.wealthv2) cov(e.richlawv2*e.wealthv2)

 estat gof, stat(all) 
 
 
*** AIC :  BIC  :  RSMS:  see appendix 


*** using the survey items asked repeatedly over time, we run a CFA for each survey year, check the changes in loadings for the two redistribution items 

**** Dataset : bsas83_11_for_rep.dta

** run it year by year 

do "/Users/XXXX/ssd_bsas_longitudinal.do"

** basic model 

sem (RT -> sochelpV2 unempjobV2 dolefidlV2 doleV2 welfhelpV2 welffeetV2  ) ///
(RF -> wealthV2 indust4V2 richlawV2 redistrbV2 ),  stand

** we add the following error correlation estimates: cov(e.indust4V2*e.richlawV2) ///
***cov(e.sochelpV2*e.dolefidlV2) cov(e.doleV2*e.welffeetV2)

estat gof, stat(all) 
estat mindices


*** doleV2 and dolefidlV2 often have large mofidication indices, the final model
** that we use to compare each survey years allows for doleV2 to load on RF and redistrbV2 to load on RT


sem (RT -> sochelpV2 unempjobV2 dolefidlV2 doleV2 welfhelpV2 welffeetV2 redistrbV2 ) ///
(RF -> wealthV2 indust4V2 richlawV2 redistrbV2 doleV2), stand cov(e.indust4V2*e.richlawV2) ///
cov(e.sochelpV2*e.dolefidlV2) cov(e.doleV2*e.welffeetV2)


*** results in the appendix 



************************************************************
************************** Part XXX.2  *********************
************************************************************

**** Dataset : bsas83_11_for_rep.dta

*** building the indices for RF and RT

*************
*** method 1 : simple additive scale
*************

*** Reliability using Cronbach's alpha , reliability is higher without incomgap which is a borderline item

sort index
by index : alpha wealthV2 indust4V2 richlawV2 redistrbV2 if index != . , i
by index : alpha doleV2 sochelpV2 unempjobV2 dolefidlV2 welfhelpV2 welffeetV2  if index != . , i

*** Reliability using Mokken scale procedue 

local i=1
while `i' <=16 {

preserve

keep if index == `i'

loevh wealthV2 indust4V2 richlawV2 redistrbV2 incomgapV2, monot(minsize(30)) nip(*)
msp wealthV2 indust4V2 richlawV2 redistrbV2 incomgapV2


loevh doleV2 sochelpV2 unempjobV2 dolefidlV2 welfhelpV2 welffeetV2 , monot(minsize(30)) nip(*)
msp doleV2 sochelpV2 unempjobV2 dolefidlV2 welfhelpV2 welffeetV2 

local i = `i'+1
}

*** while Cronbach's alpha includes incomgap, the Mokken procedure discards it, but keep the redistribution question 

*** ADDITIVE SCALES


gen welfhelpV25 = welfhelpV2
recode welfhelpV25 (1 = 0) (2= 0.25) (3 = 0.5) ( 4= 0.75) (5=1) (.=.)
gen indust4V25 = indust4V2
recode indust4V25 (1 = 0) (2= 0.25) (3 = 0.5) ( 4= 0.75) (5=1) (.=.)
gen welffeetV25 = welffeetV2
recode welffeetV25 (1 = 0) (2= 0.25) (3 = 0.5) ( 4= 0.75) (5=1) (.=.)
gen sochelpV25 = sochelpV2
recode sochelpV25 (1 = 0) (2= 0.25) (3 = 0.5) ( 4= 0.75) (5=1) (.=.)
gen richlawV25 = richlawV2
recode richlawV25 (1 = 0) (2= 0.25) (3 = 0.5) ( 4= 0.75) (5=1) (.=.) 
gen redistrbV25 = redistrbV2
recode redistrbV25 (1 = 0) (2= 0.25) (3 = 0.5) ( 4= 0.75) (5=1) (.=.) 
gen wealthV25 = wealthV2
recode wealthV25 (1 = 0) (2= 0.25) (3 = 0.5) ( 4= 0.75) (5=1) (.=.) 
gen unempjobV25 = unempjobV2
recode unempjobV25 (1 = 0) (2= 0.25) (3 = 0.5) ( 4= 0.75) (5=1) (.=.) 
gen dolefidlV25 = dolefidlV2
recode dolefidlV25 (1 = 0) (2= 0.25) (3 = 0.5) ( 4= 0.75) (5=1) (.=.)
gen incomgapV25 = incomgapV2
recode incomgapV25 (1 = 0) (2= 0.5) (3=1) (.=.)
gen doleV25 = doleV2
recode doleV25 (1 = 0) (2= 0.5) (3 = 1)  (.=.)


gen RF_index =(indust4V25 + richlawV25 + redistrbV25 + wealthV25 )/4

gen RT_index= (welfhelpV25 + doleV25 +  welffeetV25 + sochelpV25 + unempjobV25 + dolefidlV25  )/6

*************************
***  ***********
*************************

egen meanRT = mean(RT_index), by(year)
egen meanRF = mean(RF_index), by(year)
sort year

*** 95 % CI 

egen countRFindex = count(RF_index), by(year)
egen SDRFindex = sd(RF_index), by(year) 
gen SE_RFindex = SDRFindex/(countRFindex)^(1/2)
gen RFindex_UB = meanRF + 1.96 * SE_RFindex
gen RFindex_LB = meanRF - 1.96 * SE_RFindex

egen countRTindex = count(RT_index), by(year)
egen SDRTindex = sd(RT_index), by(year) 
gen SE_RTindex = SDRTindex/(countRTindex)^(1/2)
gen RTindex_UB = meanRT + 1.96 * SE_RTindex
gen RTindex_LB = meanRT - 1.96 * SE_RTindex


twoway connected RTindex_LB meanRT RTindex_UB RFindex_LB meanRF RFindex_UB year 

*************************
*** Figure 1. ***********
*************************

egen meanRF_Q1 = mean(RF_index) if hhincqE == 1, by(year)
egen meanRF_Q2 = mean(RF_index) if hhincqE == 2, by(year)
egen meanRF_Q3 = mean(RF_index) if hhincqE == 3, by(year)
egen meanRF_Q4 = mean(RF_index) if hhincqE == 4, by(year)
egen meanRF_Q5 = mean(RF_index) if hhincqE == 5, by(year)

egen meanRT_Q1 = mean(RT_index) if hhincqE == 1, by(year)
egen meanRT_Q2 = mean(RT_index) if hhincqE == 2, by(year)
egen meanRT_Q3 = mean(RT_index) if hhincqE == 3, by(year)
egen meanRT_Q4 = mean(RT_index) if hhincqE == 4, by(year)
egen meanRT_Q5 = mean(RT_index) if hhincqE == 5, by(year)


*** 95 % CI 


egen countRF_Q1 = count(RF_index) if hhincqE == 1, by(year)
egen countRF_Q2 = count(RF_index) if hhincqE == 2, by(year) 
egen countRF_Q3 = count(RF_index) if hhincqE == 3, by(year) 
egen countRF_Q4 = count(RF_index) if hhincqE == 4, by(year) 
egen countRF_Q5 = count(RF_index) if hhincqE == 5, by(year) 

egen SDRF_Q1 = sd(RF_index) if hhincqE == 1, by(year)
egen SDRF_Q2 = sd(RF_index) if hhincqE == 2, by(year) 
egen SDRF_Q3 = sd(RF_index) if hhincqE == 3, by(year) 
egen SDRF_Q4 = sd(RF_index) if hhincqE == 4, by(year) 
egen SDRF_Q5 = sd(RF_index) if hhincqE == 5, by(year)

gen SE_RF_Q1 = SDRF_Q1/(countRF_Q1)^(1/2)
gen SE_RF_Q2 = SDRF_Q2/(countRF_Q2)^(1/2)
gen SE_RF_Q3 = SDRF_Q3/(countRF_Q3)^(1/2)
gen SE_RF_Q4 = SDRF_Q4/(countRF_Q4)^(1/2)
gen SE_RF_Q5 = SDRF_Q5/(countRF_Q5)^(1/2)

gen RF_LB_Q1 = meanRF_Q1 - 1.96 * SE_RF_Q1
gen RF_LB_Q2 = meanRF_Q2 - 1.96 * SE_RF_Q2
gen RF_LB_Q3 = meanRF_Q3 - 1.96 * SE_RF_Q3
gen RF_LB_Q4 = meanRF_Q4 - 1.96 * SE_RF_Q4
gen RF_LB_Q5 = meanRF_Q5 - 1.96 * SE_RF_Q5

gen RF_UB_Q1 = meanRF_Q1 + 1.96 * SE_RF_Q1
gen RF_UB_Q2 = meanRF_Q2 + 1.96 * SE_RF_Q2
gen RF_UB_Q3 = meanRF_Q3 + 1.96 * SE_RF_Q3
gen RF_UB_Q4 = meanRF_Q4 + 1.96 * SE_RF_Q4
gen RF_UB_Q5 = meanRF_Q5 + 1.96 * SE_RF_Q5 
 
egen countRT_Q1 = count(RT_index) if hhincqE == 1, by(year)
egen countRT_Q2 = count(RT_index) if hhincqE == 2, by(year) 
egen countRT_Q3 = count(RT_index) if hhincqE == 3, by(year) 
egen countRT_Q4 = count(RT_index) if hhincqE == 4, by(year) 
egen countRT_Q5 = count(RT_index) if hhincqE == 5, by(year) 

egen SDRT_Q1 = sd(RT_index) if hhincqE == 1, by(year)
egen SDRT_Q2 = sd(RT_index) if hhincqE == 2, by(year) 
egen SDRT_Q3 = sd(RT_index) if hhincqE == 3, by(year) 
egen SDRT_Q4 = sd(RT_index) if hhincqE == 4, by(year) 
egen SDRT_Q5 = sd(RT_index) if hhincqE == 5, by(year)

gen SE_RT_Q1 = SDRT_Q1/(countRT_Q1)^(1/2)
gen SE_RT_Q2 = SDRT_Q2/(countRT_Q2)^(1/2)
gen SE_RT_Q3 = SDRT_Q3/(countRT_Q3)^(1/2)
gen SE_RT_Q4 = SDRT_Q4/(countRT_Q4)^(1/2)
gen SE_RT_Q5 = SDRT_Q5/(countRT_Q5)^(1/2)

gen RT_LB_Q1 = meanRT_Q1 - 1.96 * SE_RT_Q1
gen RT_LB_Q2 = meanRT_Q2 - 1.96 * SE_RT_Q2
gen RT_LB_Q3 = meanRT_Q3 - 1.96 * SE_RT_Q3
gen RT_LB_Q4 = meanRT_Q4 - 1.96 * SE_RT_Q4
gen RT_LB_Q5 = meanRT_Q5 - 1.96 * SE_RT_Q5

gen RT_UB_Q1 = meanRT_Q1 + 1.96 * SE_RT_Q1
gen RT_UB_Q2 = meanRT_Q2 + 1.96 * SE_RT_Q2
gen RT_UB_Q3 = meanRT_Q3 + 1.96 * SE_RT_Q3
gen RT_UB_Q4 = meanRT_Q4 + 1.96 * SE_RT_Q4
gen RT_UB_Q5 = meanRT_Q5 + 1.96 * SE_RT_Q5



twoway connected RF_UB_Q1 meanRF_Q1 RF_LB_Q1 RF_LB_Q3 meanRF_Q3 RF_UB_Q3 RF_LB_Q5 meanRF_Q5 RF_UB_Q5 year
twoway connected RT_UB_Q1 meanRT_Q1 RT_LB_Q1 RT_LB_Q3 meanRT_Q3 RT_UB_Q3 RT_LB_Q5 meanRT_Q5 RT_UB_Q5 year 

twoway connected meanRF_Q1 meanRF_Q3 meanRF_Q5 year if year > 1986
twoway connected meanRT_Q1 meanRT_Q3 meanRT_Q5 year if year > 1986
twoway connected RT_UB_Q1 meanRT_Q1 RT_LB_Q1 RT_LB_Q3 meanRT_Q3 RT_UB_Q3 RT_LB_Q5 meanRT_Q5 RT_UB_Q5 year 




